package com.cherrish.demo;

import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @author cherrish
 * @time 2019-04-04 9:24
 * @name Robot
 * @desc:
 */
public class Robot {
    public static void main(String[] args) {
        crawler();
    }

    private static void crawler(){
        //String regex = "http://[\\w+\\.?/?]+\\.[A-Za-z]+";
        //url匹配规则
        String regex = "https://[\\w+\\.?/?]+\\.[A-Za-z]+";
        URL url = null;
        URLConnection urlconn = null;
        BufferedReader br = null;
        PrintWriter pw = null;

        Pattern p = Pattern.compile(regex);

        try {
            url = new URL("https://www.rndsystems.com/cn");
            urlconn = url.openConnection();
            pw = new PrintWriter(new FileWriter("D:\\workspace\\test\\001\\do-demo\\demo-spider\\target\\SiteUrl.txt"), true);
            br = new BufferedReader(new InputStreamReader(urlconn.getInputStream()));
            String buf = null;
            while (null != (buf = br.readLine())){
                Matcher matcher = p.matcher(buf);
                while (matcher.find()){
                    pw.println(matcher.group());
                }
            }
            System.out.println("Success");
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
